################################################################################
#                                                                              #
#   Replication code: Predictors of support for a ban on killer robots:        #   
#                     Preventive Arms Control as an Anticipatory Response to   #
#                     Military Innovation                                      #
#                                                                              #
#   Author: Ondrej Rosendorf                                                   #
#           Department of International Relations, Charles University          #
#                                                                              #
#   Contact: ondrej.rosendorf@fsv.cuni.cz                                      #
#                                                                              #
#   R version: 3.6.2                                                           #
#                                                                              #
################################################################################

######## PREPARING DATA ########

#### Clear workspace, set working directory ####

rm(list=ls())
setwd(dir = "C:/")

#### Load packages ####

pks <- c("DescTools", "ordinal", "readr", "stargazer", "tidyverse", "interplot", "SDMTools", "ROCR", "corrplot", "nnet")

lapply(pks, require, character.only=TRUE)

#### Load data ####

mydata <- read_tsv(file = "laws data.txt", 
                   col_types = c("ccfffnnnfffiiffiiinifffiiinniiiiiiiifffffiinn"))

# Releveling factor variables

mydata$BAN_1 <- fct_relevel(mydata$BAN_1, c("0", "1"))
mydata$BAN_2 <- fct_relevel(mydata$BAN_2, c("0", "0.5", "1"))
mydata$UAV_1 <- fct_relevel(mydata$UAV_1, c("0", "1"))
mydata$UAV_2 <- fct_relevel(mydata$UAV_2, c("0", "1"))
mydata$TOP <- fct_relevel(mydata$TOP, c("0", "1"))
mydata$RIV_1 <- fct_relevel(mydata$RIV_1, c("0", "1"))
mydata$RIV_2 <- fct_relevel(mydata$RIV_2, c("0", "1"))
mydata$DEM <- fct_relevel(mydata$DEM, c("0", "1"))
mydata$ANO <- fct_relevel(mydata$ANO, c("0", "1"))
mydata$AUT <- fct_relevel(mydata$AUT, c("0", "1"))
mydata$MPS <- fct_relevel(mydata$MPS, c("0", "1"))
mydata$NAM <- fct_relevel(mydata$NAM, c("0", "1"))
mydata$LDC <- fct_relevel(mydata$LDC, c("0", "1"))
mydata$CCW_3 <- fct_relevel(mydata$CCW_3, c("0", "1"))
mydata$CCW_4 <- fct_relevel(mydata$CCW_4, c("0", "1"))

# Normalizing skewed distribution

mydata$GDP_log <- log10(mydata$GDP)
mydata$MIL_log <- log10(mydata$MIL+1)
mydata$ARM_log <- log10(mydata$ARM+1)
mydata$TER_log <- log10(mydata$TER+1)
mydata$HCT_log <- log10(mydata$HCT+1)

# Creating new variables
mydata$POL_1 <- (mydata$POL+11)
mydata$POL_2 <- (mydata$POL_1^2)
mydata$WFI_1 <- (mydata$WFI+1)
mydata$WFI_2 <- (mydata$WFI_1^2)

######## DESCRIPTIVE STATISTICS AND CORRELATION MATRIX ########

# Subsetting data for descriptive statistics
mydata_descriptive <- read_tsv(file = "laws data.txt", 
                               col_types = c("ccnncnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn"))
myvars <- c("BAN_1", "ECI", "GDP", "UAV_1", "DIS", "TER", "RIV_1", "POL", "HRR_1", "CCW_2", "LDC")
mydata_descriptive <- mydata_descriptive[myvars]

# Summary statistics
summary(mydata_descriptive)

# Correlation matrix
corplot <- cor(mydata_descriptive, use = "pairwise.complete.obs")

# Output for descriptive statistics and correlation matrix
mydata_descriptive <- as.data.frame(mydata_descriptive)

stargazer(mydata_descriptive, out="Descriptive.htm", type = "html", title="Descriptive statistics")
stargazer(corplot, out="Correlation.htm", type = "html", title="Correlation matrix")

######## MAIN ANALYSIS ########

#### Running logit models ####

# Baseline model
M1 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M1)
PseudoR2(M1, which = "McFadden")

# Model with LDC control
M2 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M2)
PseudoR2(M2, which = "McFadden")

# Model with GDP per capita
M3 <- glm(BAN_1 ~ GDP_log + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M3)
PseudoR2(M3, which = "McFadden")

# Model with rivalry
M4 <- glm(BAN_1 ~ ECI + UAV_1 + RIV_1 + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M4)
PseudoR2(M4, which = "McFadden")

# Model with GDP per capita and rivalry
M5 <- glm(BAN_1 ~ GDP_log + UAV_1 + RIV_1 + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M5)
PseudoR2(M5, which = "McFadden")

#### Creating regression table ####

stargazer(M1, M2, M3, M4, M5, out="Regression table 1.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

#### Extracting predicted probabilities ####

predictions1 <- predict(M1, type="response")
predictions2 <- predict(M2, type="response")
predictions3 <- predict(M3, type="response")
predictions4 <- predict(M4, type="response")
predictions5 <- predict(M5, type="response")

stargazer(predictions1, predictions2, predictions3, predictions4, predictions5, out="Predicted probabilities.htm", type = "html")

#### Confusion matrices for models ####

# Matching data observations with models
mydata_conf1 <- mydata[complete.cases(mydata[,c("ECI", "POL")]),]
actual_values_1 <- mydata_conf1$BAN_1

mydata_conf2 <- mydata[complete.cases(mydata[,c("ECI", "POL")]),]
actual_values_2 <- mydata_conf2$BAN_1

mydata_conf3 <- mydata[complete.cases(mydata[,c("GDP", "POL")]),]
actual_values_3 <- mydata_conf3$BAN_1

mydata_conf4 <- mydata[complete.cases(mydata[,c("ECI", "POL")]),]
actual_values_4 <- mydata_conf4$BAN_1

mydata_conf5 <- mydata[complete.cases(mydata[,c("GDP", "POL")]),]
actual_values_5 <- mydata_conf5$BAN_1

# Setting threshold at 0.5
threshold = 0.5

# Model 1
predicted_values_1 <- ifelse(predict(M1, type="response")>threshold,1,0)
confusion.matrix(actual_values_1, predicted_values_1)
accuracy(actual_values_1, predicted_values_1)

# Model 2
predicted_values_2 <- ifelse(predict(M2, type="response")>threshold,1,0)
confusion.matrix(actual_values_2, predicted_values_2)
accuracy(actual_values_2, predicted_values_2)

# Model 3
predicted_values_3 <- ifelse(predict(M3, type="response")>threshold,1,0)
confusion.matrix(actual_values_3, predicted_values_3)
accuracy(actual_values_3, predicted_values_3)

# Model 4
predicted_values_4 <- ifelse(predict(M4, type="response")>threshold,1,0)
confusion.matrix(actual_values_4, predicted_values_4)
accuracy(actual_values_4, predicted_values_4)

# Model 5
predicted_values_5 <- ifelse(predict(M5, type="response")>threshold,1,0)
confusion.matrix(actual_values_5, predicted_values_5)
accuracy(actual_values_5, predicted_values_5)

#### ROC curve for models 1-5 ####

# Setting threshold at 0.5
threshold = 0.5

# Model 1
pred <- predict(M1, type="response")
ROCRpred <- prediction(pred, mydata_conf1$BAN_1)
ROCRperf <- performance(ROCRpred, "tpr", "fpr")
plot(ROCRperf)
plot(ROCRperf, colorize=TRUE)
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1))
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2,1.7))

# Model 2
pred <- predict(M2, type="response")
ROCRpred <- prediction(pred, mydata_conf2$BAN_1)
ROCRperf <- performance(ROCRpred, "tpr", "fpr")
plot(ROCRperf)
plot(ROCRperf, colorize=TRUE)
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1))
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2,1.7))

# Model 3
pred <- predict(M3, type="response")
ROCRpred <- prediction(pred, mydata_conf3$BAN_1)
ROCRperf <- performance(ROCRpred, "tpr", "fpr")
plot(ROCRperf)
plot(ROCRperf, colorize=TRUE)
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1))
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2,1.7))

# Model 4
pred <- predict(M4, type="response")
ROCRpred <- prediction(pred, mydata_conf4$BAN_1)
ROCRperf <- performance(ROCRpred, "tpr", "fpr")
plot(ROCRperf)
plot(ROCRperf, colorize=TRUE)
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1))
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2,1.7))

# Model 5
pred <- predict(M5, type="response")
ROCRpred <- prediction(pred, mydata_conf5$BAN_1)
ROCRperf <- performance(ROCRpred, "tpr", "fpr")
plot(ROCRperf)
plot(ROCRperf, colorize=TRUE)
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1))
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2,1.7))

######## ANALYSIS WITH ALTERNATIVE DEPENDENT VARIABLE ########

# Baseline model
M6 <- clm(BAN_2 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M6)

# Model with LDC control
M7 <- clm(BAN_2 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M7)

# Model with GDP per capita
M8 <- clm(BAN_2 ~ GDP_log + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M8)

# Model with rivalry
M9 <- clm(BAN_2 ~ ECI + UAV_1 + RIV_1 + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M9)

# Model with GDP per capita and rivalry
M10 <- clm(BAN_2 ~ GDP_log + UAV_1 + RIV_1 + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M10)

#### Creating regression table ####

stargazer(M6, M7, M8, M9, M10, out="Regression table 2.htm", type = "html", title="Results of ordered logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

######## ANALYSIS WITH ALTERNATVIE INDEPENDENT VARIABLES ########

#### Alternative measures of financial and technological capacity ####

# Model with UCAV possession
M11 <- glm(BAN_1 ~ ECI + UAV_2 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M11)
PseudoR2(M11, which = "McFadden")

# Model with SIPRI top 100
M12 <- glm(BAN_1 ~ ECI + TOP + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M12)
PseudoR2(M12, which = "McFadden")

# Model with arms exports
M13 <- glm(BAN_1 ~ ECI + ARM_log + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
          data = mydata, family = "binomial", na.action = na.omit)
summary(M13)
PseudoR2(M13, which = "McFadden")

# Model with military expenditure
M14 <- glm(BAN_1 ~ ECI + MIL_log + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M14)
PseudoR2(M14, which = "McFadden")

#### Creating regression table ####

stargazer(M2, M11, M12, M13, M14, out="Regression table 3.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

#### Alternative measures of security threats ####

# Model with high casualty bombings
M15 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + HCT_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M15)
PseudoR2(M15, which = "McFadden")

# Model with armed conflict magnitude
M16 <- glm(BAN_1 ~ ECI + UAV_1 + MEV + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M16)
PseudoR2(M16, which = "McFadden")

# Model with rivalry count
M17 <- glm(BAN_1 ~ ECI + UAV_1 + RIV_2 + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M17)
PseudoR2(M17, which = "McFadden")

# Model with rivalry with a major power
M18 <- glm(BAN_1 ~ ECI + UAV_1 + RIV_3 + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M18)
PseudoR2(M18, which = "McFadden")

#### Creating regression table ####

stargazer(M2, M15, M16, M17, M18, out="Regression table 4.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

#### Alternative measures of regime type ####

# Model with dichotmous democracy and autocracy
M19 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + DEM + AUT + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M19)
PseudoR2(M19, which = "McFadden")

# Model with dichotomous anocracy
M20 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + ANO + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M20)
PseudoR2(M20, which = "McFadden")

# Model with world freedom index
M21 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + WFI_1 + WFI_2 + HRR_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M21)
PseudoR2(M21, which = "McFadden")

#### Creating regression table ####

stargazer(M2, M19, M20, M21, out="Regression table 5.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

#### Alternative measures of normative drivers ####

# Model with arms control treaties (signature)
M22 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + DTR_1 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M22)
PseudoR2(M22, which = "McFadden")

# Model with arms control treaties (ratification)
M23 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + DTR_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M23)
PseudoR2(M23, which = "McFadden")

# Model with number of humanitarian disarmament treaties (signed)
M24 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + HUD_1 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M24)
PseudoR2(M24, which = "McFadden")

# Model with number of humanitarian disarmament treaties (ratified)
M25 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + HUD_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M25)
PseudoR2(M25, which = "McFadden")

# Model with UNGA disarmament voting
M26 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + UND + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M26)
PseudoR2(M26, which = "McFadden")

#### Creating regression table ####

stargazer(M2, M22, M23, M24, M25, M26, out="Regression table 6.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

# Model with human rights scores
M27 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRS + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M27)
PseudoR2(M27, which = "McFadden")

# Model with political stability and absene of violence
M28 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + PSV + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M28)
PseudoR2(M28, which = "McFadden")

# Model with number of IHRL treaties (signature)
M29 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRT_1 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M29)
PseudoR2(M29, which = "McFadden")

# Model with number of IHRL treaties (ratification)
M30 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRT_2 + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M30)
PseudoR2(M30, which = "McFadden")

# Model with UNGA human rights voting
M31 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + UNH + CCW_2 + LDC,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M31)
PseudoR2(M31, which = "McFadden")

#### Creating regression table ####

stargazer(M2, M27, M28, M29, M30, M31, out="Regression table 7.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))

# Model with major power status
M32 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC + MPS,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M32)
PseudoR2(M32, which = "McFadden")

# Model with NAM membership
M33 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC + NAM,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M33)
PseudoR2(M33, which = "McFadden")

# Model with CCW membership
M34 <- glm(BAN_1 ~ ECI + UAV_1 + DIS + TER_log + POL_1 + POL_2 + HRR_1 + CCW_2 + LDC + CCW_4,
           data = mydata, family = "binomial", na.action = na.omit)
summary(M34)
PseudoR2(M34, which = "McFadden")

stargazer(M2, M32, M33, M34, out="Regression table 8.htm", type = "html", title="Results of logit regression",
          star.char = c("+", "*", "**", "***"),
          star.cutoffs = c(.1, .05, .01, .001))